Take memory T-cells and annotate Th subtypes

.col <- c("tag", "celltype", "prob", "ln.prob")

round1.dt <-
    .fread("result/step2/assignment/round1.annot.gz", col.names=.col) %>%
    mutate(j = 1:n())

Adjusting the difference between mTreg and mTconv to magnify Th classes

mkdir -p result/step3/data

[ -f result/step3/data/Memory.mtx.gz ] || \
    mmutil_select_col \
        result/step1/matrix_final.mtx.gz \
        result/step1/matrix_final.cols.gz \
        result/step3/data/Memory.tags.gz \
        result/step3/data/Memory

We additionally controlled the difference between mTreg and mTconv to identify T-helper cell types based on common subtype definitions shared between the two types of memory T-cells.

.dt <- 
    .fread("result/step3/data/Memory.cols.gz", col.names="tag") %>%
    left_join(round1.dt) %>%
    as.data.table

.dt <-
    .dt[, c("barcode","batch") := tstrsplit(tag, "_")] %>%
    mutate(batch2 = celltype %&% "_" %&% batch) %>%
    as.data.table

.mkdir("result/step3/bbknn")
.fwrite(.dt[, .(batch2)], "result/step3/bbknn/Memory.batches.gz")
mkdir -p result/step3/bbknn

[ -f result/step3/bbknn/Memory.mtx.gz ] || \
    mmutil_bbknn \
        --mtx result/step3/data/Memory.mtx.gz \
        --col result/step3/data/Memory.cols.gz \
        --batch result/step3/bbknn/Memory.batches.gz \
        --knn 100 --rank 100 --log_scale \
        --out result/step3/bbknn/Memory
[ -f result/step3/assignment/Memory.annot.gz ] || \
    mmutil_annotate_col \
        --svd_u result/step3/bbknn/Memory.svd_U.gz \
        --svd_d result/step3/bbknn/Memory.svd_D.gz \
        --svd_v result/step3/bbknn/Memory.factors.gz \
        --row result/step1/matrix_final.rows.gz \
        --col result/step3/bbknn/Memory.cols.gz \
        --ann marker/surface_round2_positive.txt \
        --anti marker/surface_round2_negative.txt \
        --em_iter 1000 --kappa_max 100 --batch_size 500 --em_tol 1e-8 \
        --out result/step3/assignment/Memory
.col <- c("tag", "celltype.th", "prob.th", "ln.prob.th")
round2.dt <- .fread("result/step3/assignment/Memory.annot.gz", col.names=.col)

annot.dt <- left_join(round1.dt, round2.dt) %>%
    filter(!is.na(celltype.th)) %>%
    as.data.table
prot.raw.mtx <- Matrix::readMM("result/step2/protein.mtx.gz") %>% as.matrix()
rownames(prot.raw.mtx) <- .fread("result/step2/protein.rows.gz") %>% unlist
colnames(prot.raw.mtx) <- .fread("result/step2/protein.cols.gz") %>% unlist

features <- .fread("result/step1/matrix_final.rows.gz") %>% unlist
.idx <- which(str_starts(features, "anti_"))
.rows <- features[.idx]

U <- .fread("result/step3/bbknn/Memory.svd_U.gz") %>% as.matrix
U <- U[.idx, , drop = FALSE]
D <- .fread("result/step3/bbknn/Memory.svd_D.gz") %>% as.matrix
V <- .fread("result/step3/bbknn/Memory.factors.gz") %>% as.matrix

prot.mtx <- sweep(U, 2, D, `*`) %*% t(V)
prot.mtx <- scale(prot.mtx)
rownames(prot.mtx) <- .rows
colnames(prot.mtx) <- .fread("result/step3/bbknn/Memory.cols.gz") %>% unlist

Show global patterns after controlling the difference between the regulatory vs. conventional T-cell signatures

read.umap <- function(.xx, out.file, npc = Inf, ...) {
    dir.create(dirname(out.file), recursive=TRUE, showWarnings=FALSE)
    if(!file.exists(out.file)) {
        ## .xx <- .fread(svd.file) %>% as.matrix
        npc <- min(ncol(.xx), npc)
        .xx <- .xx[, 1:npc, drop = FALSE]
        .umap <- uwot::umap(.xx, 
                            n_threads = 15,
                            fast_sgd=TRUE,
                            verbose=TRUE,
                            ...)
        .fwrite(.umap, file=out.file)
        .umap.dt <- .read.mat(out.file)
    } else {
        .umap.dt <- .read.mat(out.file)
    }
    return(as.data.table(.umap.dt))
}

out.file <- "result/step3/bbknn/Memory.umap.gz"
## unlink(out.file)
.xx <- t(prot.mtx)
.key <- "anti_" %&% c("CD194","CD196","CD183","CD127","CD25","CD45RO","CD45RA")
.xx <- .xx[, .key, drop = FALSE]

.umap.dt <- read.umap(.xx, out.file, spread = 10)
.col.file <- "result/step3/bbknn/Memory.cols.gz"

.umap.dt <-
    cbind(.umap.dt, .fread(.col.file, col.names="tag")) %>%
    left_join(annot.dt) %>%
    mutate(v1.std = scale(V1), v2.std = scale(V2)) %>%
    filter(abs(v1.std) < 3, abs(v2.std) < 3)
.dt.show <-
    .umap.dt[celltype.th != "TFH" & celltype == "mTreg"] %>%
    mutate(celltype.th = str_replace(celltype.th, "TH", "mTreg"))

.aes <- aes(x = V1, y = V2, colour = celltype.th)

plt <- 
    .gg.plot(.dt.show[sample(nrow(.dt.show))], .aes) +
    ggtitle("mTreg subtypes") +
    geom_point(stroke = 0, size = .7) +
    scale_colour_brewer(palette = "Dark2") +
    xlab("UMAP 1") + ylab("UMAP 2")
print(plt)

.file <- fig.dir %&% "/Fig_round2_umap_mTreg.pdf"
.gg.save(filename = .file, plot = plt, width=3, height=3)

PDF

.dt.show <- 
    .umap.dt[celltype.th != "TFH" & celltype == "mTconv"]

.aes <- aes(x = V1, y = V2, colour = celltype.th)

plt <-
    .gg.plot(.dt.show[sample(nrow(.dt.show))], .aes) +
    ggtitle("mTconv subtypes") +
    geom_point(stroke = 0, size = .7) +
    scale_colour_brewer(palette = "Dark2") +
    xlab("UMAP 1") + ylab("UMAP 2")
print(plt)

.file <- fig.dir %&% "/Fig_round2_umap_mTconv.pdf"
.gg.save(filename = .file, plot = plt, width=3, height=3)

PDF

.dt.show <- .umap.dt[celltype.th != "TFH"]

.aes <- aes(x = V1, y = V2, colour = celltype.th)

plt <-
    .gg.plot(.dt.show[sample(nrow(.dt.show))], .aes) +
    ggtitle("Memory T-cells") +
    geom_point(stroke = 0, size = .7) +
    scale_colour_brewer(palette = "Dark2") +
    xlab("UMAP 1") + ylab("UMAP 2")
print(plt)

.file <- fig.dir %&% "/Fig_round2_umap.pdf"
.gg.save(filename = .file, plot = plt, width=3, height=3)

PDF

Memory Treg cells

.ct <- c("Treg17","Treg1/17","Treg1","Treg2")

.temp.annot <- annot.dt %>%
    filter(celltype == "mTreg") %>%
    mutate(celltype = str_replace(celltype.th, "TH", "Treg")) %>%
    as.data.table

plt <- plt.scatter.ct.2(.ct, .temp.annot, prot.raw.mtx)

print(plt)

.file <- fig.dir %&% "/Fig_round2_cdmarker_mTreg.pdf"
.gg.save(filename = .file, plot = plt, width=6, height=4)

PDF

.ct <- c("Treg17","Treg1/17","Treg1","Treg2")

.temp.annot <- annot.dt %>%
    filter(celltype == "mTreg") %>%
    mutate(celltype = str_replace(celltype.th, "TH", "Treg")) %>%
    as.data.table

plt <- plt.scatter.ct.2(.ct, .temp.annot, prot.raw.mtx, m2x="CD183", m2y="CD196")

print(plt)

.file <- fig.dir %&% "/Fig_round2_cdmarker_mTreg_2.pdf"
.gg.save(filename = .file, plot = plt, width=6, height=4)

PDF

.ct <- c("Treg17","Treg1/17","Treg1","Treg2")

.temp.annot <- annot.dt %>%
    mutate(j = 1:n()) %>% 
    filter(celltype == "mTreg") %>%
    mutate(celltype = str_replace(celltype.th, "TH", "Treg")) %>%
    as.data.table

plt <- plt.scatter.ct.2(.ct, .temp.annot, exp(prot.mtx))

print(plt)

.file <- fig.dir %&% "/Fig_round2_cdmarker_mTreg_bbknn.pdf"
.gg.save(filename = .file, plot = plt, width=6, height=4)

PDF

.ct <- c("Treg17","Treg1/17","Treg1","Treg2")

.temp.annot <- annot.dt %>%
    mutate(j = 1:n()) %>% 
    filter(celltype == "mTreg") %>%
    mutate(celltype = str_replace(celltype.th, "TH", "Treg")) %>%
    as.data.table

plt <- plt.scatter.ct.2(.ct, .temp.annot, exp(prot.mtx), m2x="CD183", m2y="CD196")

print(plt)

.file <- fig.dir %&% "/Fig_round2_cdmarker_mTreg_bbknn_2.pdf"
.gg.save(filename = .file, plot = plt, width=6, height=4)

PDF

Average profile for mTreg subtypes

.dt <- annot.dt %>% 
  filter(celltype == "mTreg") %>%
  select(tag)

.dt[, c("barcode", "batch") := tstrsplit(tag, "_")]

.hash.info <-
    readxl::read_xlsx("data/Hashing list MS Treg project.xlsx", 1) %>%
    mutate(hash = str_remove(hash,"#")) %>%
    mutate(hash = as.integer(hash)) %>%
    rename(Sample = `Cell type`)

.hash.dt <- fread("data/hashtag.tsv.gz", header=TRUE) %>%
    left_join(.hash.info)

.dt <- .dt %>%
    mutate(batch = as.integer(batch)) %>% 
    left_join(.hash.dt) %>%
    filter(!is.na(subject), !is.na(disease))

.mkdir("result/step3/markers")

.fwrite(.dt, file = "result/step3/markers/mTreg.cells.gz")
[ -f result/step3/markers/mTreg.mtx.gz ] || \
    mmutil_select_col \
        result/step1/matrix_final.mtx.gz \
        result/step1/matrix_final.cols.gz \
        result/step3/markers/mTreg.cells.gz \
        result/step3/markers/mTreg
.dt <- 
    .fread("result/step3/markers/mTreg.cols.gz", col.names="tag") %>%
    left_join(annot.dt) %>%
    mutate(celltype.treg = str_replace(celltype.th, "TH", "Treg")) %>%
    as.data.table

.dt[, c("barcode", "batch") := tstrsplit(tag, "_")]

.dt <- .dt %>% 
    mutate(batch = as.integer(batch)) %>% 
    left_join(.hash.dt) %>% 
    mutate(ind = subject) %>% 
    mutate(ind.dummy = "ind") %>% 
    as.data.table

.fwrite(.dt[, .(ind)], "result/step3/markers/mTreg.ind.gz")
.fwrite(.dt[, .(ind.dummy)], "result/step3/markers/mTreg.ind_dummy.gz")
.fwrite(.dt[, .(tag, celltype.treg)], "result/step3/markers/mTreg.annot.gz")
.fwrite(.dt[, .(celltype.treg)] %>% unique, "result/step3/markers/mTreg.label_names.gz")
[ -f result/step3/markers/mTreg.mean.gz ] || \
    mmutil_aggregate_col \
        --mtx result/step3/markers/mTreg.mtx.gz \
        --col result/step3/markers/mTreg.cols.gz \
        --annot result/step3/markers/mTreg.annot.gz \
        --lab result/step3/markers/mTreg.label_names.gz \
        --ind result/step3/markers/mTreg.ind.gz \
        --out result/step3/markers/mTreg
[ -f result/step3/markers/mTreg_tot.mean.gz ] || \
    mmutil_aggregate_col \
        --mtx result/step3/markers/mTreg.mtx.gz \
        --col result/step3/markers/mTreg.cols.gz \
        --annot result/step3/markers/mTreg.annot.gz \
        --lab result/step3/markers/mTreg.label_names.gz \
        --ind result/step3/markers/mTreg.ind_dummy.gz \
        --out result/step3/markers/mTreg_tot
.read.melt <- function(.file, .value, .row, .col) {
    .fread(.file, col.names = .col$col) %>%
        cbind(.row) %>%
        melt(id.vars = "gene", variable.name = "col", value.name = .value) %>%
        left_join(.col) %>%
        as.data.table
}

row.dt <- .fread("result/step1/matrix_final.rows.gz", col.names="gene")

.read.all.stats <- function(.hdr) {

    col.dt <- .fread(.hdr %&% ".mu_cols.gz", col.names="col")
    col.dt[, c("subject", "celltype") := tstrsplit(col, split="_")]

    .fun <- function(...) .read.melt(..., .row = row.dt, .col = col.dt)

    ret <-
        .fun(.hdr %&% ".sum.gz", "tot") %>%
        left_join(.fun(.hdr %&% ".mean.gz", "avg")) %>% 
        left_join(.fun(.hdr %&% ".mu.gz", "mu")) %>% 
        left_join(.fun(.hdr %&% ".mu_sd.gz", "mu.sd")) %>% 
        left_join(.fun(.hdr %&% ".ln_mu.gz", "log.mu")) %>% 
        left_join(.fun(.hdr %&% ".ln_mu_sd.gz", "log.mu.sd")) %>% 
        filter(celltype != "TFH") %>%
        select(-col) %>% 
        as.data.table

    ret[tot == 0, mu := NA]
    ret[tot == 0, log.mu := NA]
    ret[tot == 0, mu.sd := NA]
    ret[tot == 0, log.mu.sd := NA]

    return(ret)
}

.mkdir("Tab")
.file <- "Tab/Summary_mTreg_celltype.csv.gz"
if(!file.exists(.file)) {
    sum.dt <- .read.all.stats("result/step3/markers/mTreg")
    fwrite(sum.dt, .file, sep = ",", col.names=TRUE, na="NA", quote=FALSE)
}
sum.dt <- fread(.file, header=TRUE)

.file <- "Tab/Summary_mTreg_total_celltype.csv.gz"
if(!file.exists(.file)) {
    sum.tot.dt <- .read.all.stats("result/step3/markers/mTreg_tot") %>% 
        select(-subject)

    fwrite(sum.tot.dt, .file, sep = ",", col.names=TRUE, na="NA", quote=FALSE)
}
sum.tot.dt <- fread(.file, header=TRUE)

Heatmap for these marker genes

.genes <- c("MGAT4A", "ITM2C", "FHIT", "CDCA7L", "CD40LG", "LYAR", "GZMK", "GBP4", "IFNGR2", "CTSH", "C1orf162", "ABCA1", "NR1D1", "GATA3", "CXCR3", "anti_CD183", "anti_CD196", "anti_CD194", "anti_CD185")

.dt <- sum.tot.dt[gene %in% .genes]

.df <- .dt %>%
    mutate(row = celltype, col = gene) %>% 
    group_by(gene) %>% 
    mutate(weight = scale(pmax(log.mu, -4))) %>% 
    ungroup %>% 
    na.omit %>% 
    order.pair(ret.tab = TRUE) %>%
    mutate(tt = if_else(str_starts(gene, "anti_"), "protein", "gene"))

.gene.o <- unique(.df$col) %>% sort %>% as.character

plt <-
    .gg.plot(.df, aes(y = col, x = row, fill = exp(weight))) +
    facet_grid(tt ~ ., space="free", scales="free") +
    geom_tile(size = .1, colour = "gray20") +
    theme(axis.title = element_blank()) +
    scale_x_discrete(position = "top") +
    theme(axis.text.x = element_text(angle=90, vjust=0, hjust=0)) +
    theme(legend.position = "bottom") +
    scale_fill_distiller("normalized\nweights", palette = "YlGnBu", direction = 1)

print(plt)

.file <- fig.dir %&% "/Fig_mTreg_heatmap.pdf"
.gg.save(filename = .file, plot = plt, width=1.5, height=2.5)

PDF

.dt <- sum.dt[gene %in% .genes]

.df <- .dt %>%
    mutate(col = subject %&% "@" %&% celltype, row = gene) %>% 
    group_by(gene) %>% 
    mutate(weight = pmax(pmin(scale(pmax(log.mu, -4)), 1), -1)) %>% 
    ungroup %>% 
    na.omit %>% 
    col.order(.ro = .gene.o, ret.tab = TRUE) %>% 
    na.omit %>% 
    mutate(tt = if_else(str_starts(gene, "anti_"), "protein", "gene"))

plt <- 
    .gg.plot(.df, aes(x = col, y = row, fill = exp(weight))) +
    facet_grid(tt ~ celltype, space="free", scales="free") +
    geom_tile(size = .1, colour = "gray20") +
    theme(axis.title = element_blank()) +
    scale_x_discrete(position = "top") +
    theme(axis.text.x = element_text(angle=90, vjust=0, hjust=0)) +
    theme(legend.position = "bottom") +
    scale_fill_distiller("normalized\nweights", palette = "YlGnBu", direction = 1)

print(plt)

.file <- fig.dir %&% "/Fig_mTreg_heatmap_ind.pdf"
.gg.save(filename = .file, plot = plt, width=5, height=3)

PDF

Box plots for each gene

show.gene <- function(g, sum.dt) {

    .dt <- sum.dt[gene == g]

    .comp <- list(c("Treg1", "Treg1/17"),
                  c("Treg1/17", "Treg17"),
                  c("Treg1", "Treg17"),              
                  c("Treg17", "Treg2"),
                  c("Treg1/17", "Treg2"),
                  c("Treg1", "Treg2"))

    .gg.plot(.dt, aes(x = celltype, y = avg, group = celltype)) +
        theme(axis.text.x = element_text(angle=90, vjust=.5, hjust=1)) +
        theme(axis.title.x = element_blank()) +
        geom_boxplot(outlier.stroke=0, outlier.size=0, size=.5, width = .5) +
        geom_jitter(aes(fill = celltype), stroke=.2, size = 1, pch = 21, height = 0, width = .2) +
        scale_fill_brewer(palette = "Dark2", guide=FALSE) +
        ggpubr::stat_compare_means(comparisons = .comp, size=2, method = "wilcox.test") +
        ylab("average " %&% g %&% " expression\nwithin each individual")
}
.genes <- c("MGAT4A", "ITM2C", "FHIT", "CDCA7L", "CD40LG", "LYAR", "GZMK", "GBP4", "IFNGR2", "CTSH", "C1orf162", "ABCA1", "NR1D1", "GATA3", "CXCR3", "anti_CD183", "anti_CD196", "anti_CD194", "anti_CD185")

for(g in .genes) {
    plt <- show.gene(g, sum.dt)
    print(plt)
    .file <- fig.dir %&% "/Fig_mTreg_marker_" %&% g %&% ".pdf"
    .gg.save(filename = .file, plot = plt, width=1.3, height=2)
}

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

Conventional memory T-cells

.ct <- c("TH17","TH1/17","TH1","TH2")

.temp.annot <- annot.dt %>%
    filter(celltype == "mTconv") %>%
    mutate(celltype = celltype.th) %>% 
    as.data.table

plt <- plt.scatter.ct.2(.ct, .temp.annot, prot.raw.mtx)

print(plt)

.file <- fig.dir %&% "/Fig_round2_cdmarker_mTconv.pdf"
.gg.save(filename = .file, plot = plt, width=6, height=4)

PDF

.ct <- c("TH17","TH1/17","TH1","TH2")

.temp.annot <- annot.dt %>%
    filter(celltype == "mTconv") %>%
    mutate(celltype = celltype.th) %>% 
    as.data.table

plt <- plt.scatter.ct.2(.ct, .temp.annot, prot.raw.mtx, m2x="CD183", m2y="CD196")

print(plt)

.file <- fig.dir %&% "/Fig_round2_cdmarker_mTconv_2.pdf"
.gg.save(filename = .file, plot = plt, width=6, height=4)

PDF

.ct <- c("TH17","TH1/17","TH1","TH2")

.temp.annot <- annot.dt %>%
    mutate(j = 1:n()) %>% 
    filter(celltype == "mTconv") %>%
    mutate(celltype = celltype.th) %>% 
    as.data.table

plt <- plt.scatter.ct.2(.ct, .temp.annot, exp(prot.mtx))

print(plt)

.file <- fig.dir %&% "/Fig_round2_cdmarker_mTconv_bbknn.pdf"
.gg.save(filename = .file, plot = plt, width=6, height=4)

PDF

.ct <- c("TH17","TH1/17","TH1","TH2")

.temp.annot <- annot.dt %>%
    mutate(j = 1:n()) %>% 
    filter(celltype == "mTconv") %>%
    mutate(celltype = celltype.th) %>% 
    as.data.table

plt <- plt.scatter.ct.2(.ct, .temp.annot, exp(prot.mtx), m2x="CD183", m2y="CD196")

print(plt)

.file <- fig.dir %&% "/Fig_round2_cdmarker_mTconv_bbknn_2.pdf"
.gg.save(filename = .file, plot = plt, width=6, height=4)

PDF

Average profile for mTconv subtypes

.dt <- annot.dt %>% 
  filter(celltype == "mTconv") %>%
  select(tag)

.dt[, c("barcode", "batch") := tstrsplit(tag, "_")]

.hash.info <-
    readxl::read_xlsx("data/Hashing list MS Treg project.xlsx", 1) %>%
    mutate(hash = str_remove(hash,"#")) %>%
    mutate(hash = as.integer(hash)) %>%
    rename(Sample = `Cell type`)

.hash.dt <- fread("data/hashtag.tsv.gz", header=TRUE) %>%
    left_join(.hash.info)

.dt <- .dt %>%
    mutate(batch = as.integer(batch)) %>% 
    left_join(.hash.dt) %>%
    filter(!is.na(subject), !is.na(disease))

.mkdir("result/step3/markers")

.fwrite(.dt, file = "result/step3/markers/mTconv.cells.gz")
[ -f result/step3/markers/mTconv.mtx.gz ] || \
    mmutil_select_col \
        result/step1/matrix_final.mtx.gz \
        result/step1/matrix_final.cols.gz \
        result/step3/markers/mTconv.cells.gz \
        result/step3/markers/mTconv
.dt <- 
    .fread("result/step3/markers/mTconv.cols.gz", col.names="tag") %>%
    left_join(annot.dt) %>%
    as.data.table

.dt[, c("barcode", "batch") := tstrsplit(tag, "_")]

.dt <- .dt %>% 
    mutate(batch = as.integer(batch)) %>% 
    left_join(.hash.dt) %>% 
    mutate(ind = subject) %>% 
    mutate(ind.dummy = "ind") %>% 
    as.data.table

.fwrite(.dt[, .(ind)], "result/step3/markers/mTconv.ind.gz")
.fwrite(.dt[, .(ind.dummy)], "result/step3/markers/mTconv.ind_dummy.gz")
.fwrite(.dt[, .(tag, celltype.th)], "result/step3/markers/mTconv.annot.gz")
.fwrite(.dt[, .(celltype.th)] %>% unique, "result/step3/markers/mTconv.label_names.gz")
[ -f result/step3/markers/mTconv.mean.gz ] || \
    mmutil_aggregate_col \
        --mtx result/step3/markers/mTconv.mtx.gz \
        --col result/step3/markers/mTconv.cols.gz \
        --annot result/step3/markers/mTconv.annot.gz \
        --lab result/step3/markers/mTconv.label_names.gz \
        --ind result/step3/markers/mTconv.ind.gz \
        --out result/step3/markers/mTconv
[ -f result/step3/markers/mTconv_tot.mean.gz ] || \
    mmutil_aggregate_col \
        --mtx result/step3/markers/mTconv.mtx.gz \
        --col result/step3/markers/mTconv.cols.gz \
        --annot result/step3/markers/mTconv.annot.gz \
        --lab result/step3/markers/mTconv.label_names.gz \
        --ind result/step3/markers/mTconv.ind_dummy.gz \
        --out result/step3/markers/mTconv_tot
.read.melt <- function(.file, .value, .row, .col) {
    .fread(.file, col.names = .col$col) %>%
        cbind(.row) %>%
        melt(id.vars = "gene", variable.name = "col", value.name = .value) %>%
        left_join(.col) %>%
        as.data.table
}

row.dt <- .fread("result/step1/matrix_final.rows.gz", col.names="gene")

.read.all.stats <- function(.hdr) {

    col.dt <- .fread(.hdr %&% ".mu_cols.gz", col.names="col")
    col.dt[, c("subject", "celltype") := tstrsplit(col, split="_")]

    .fun <- function(...) .read.melt(..., .row = row.dt, .col = col.dt)

    ret <-
        .fun(.hdr %&% ".sum.gz", "tot") %>%
        left_join(.fun(.hdr %&% ".mean.gz", "avg")) %>% 
        left_join(.fun(.hdr %&% ".mu.gz", "mu")) %>% 
        left_join(.fun(.hdr %&% ".mu_sd.gz", "mu.sd")) %>% 
        left_join(.fun(.hdr %&% ".ln_mu.gz", "log.mu")) %>% 
        left_join(.fun(.hdr %&% ".ln_mu_sd.gz", "log.mu.sd")) %>% 
        filter(celltype != "TFH") %>%
        select(-col) %>% 
        as.data.table

    ret[tot == 0, mu := NA]
    ret[tot == 0, log.mu := NA]
    ret[tot == 0, mu.sd := NA]
    ret[tot == 0, log.mu.sd := NA]

    return(ret)
}

.mkdir("Tab")
.file <- "Tab/Summary_mTconv_celltype.csv.gz"
if(!file.exists(.file)) {
    sum.dt <- .read.all.stats("result/step3/markers/mTconv")
    fwrite(sum.dt, .file, sep = ",", col.names=TRUE, na="NA", quote=FALSE)
}
sum.dt <- fread(.file, header=TRUE)

.file <- "Tab/Summary_mTconv_total_celltype.csv.gz"
if(!file.exists(.file)) {
    sum.tot.dt <- .read.all.stats("result/step3/markers/mTconv_tot") %>% 
        select(-subject)

    fwrite(sum.tot.dt, .file, sep = ",", col.names=TRUE, na="NA", quote=FALSE)
}
sum.tot.dt <- fread(.file, header=TRUE)

Heatmap for these marker genes

.genes <- c("MGAT4A", "ITM2C", "FHIT", "CDCA7L", "CD40LG", "LYAR", "GZMK", "GBP4", "IFNGR2", "CTSH", "C1orf162", "ABCA1", "NR1D1", "GATA3", "CXCR3", "anti_CD183", "anti_CD196", "anti_CD194", "anti_CD185")

.dt <- sum.tot.dt[gene %in% .genes]

.df <- .dt %>%
    mutate(row = celltype, col = gene) %>% 
    group_by(gene) %>% 
    mutate(weight = scale(pmax(log.mu, -4))) %>% 
    ungroup %>% 
    na.omit %>% 
    order.pair(ret.tab = TRUE) %>%
    mutate(tt = if_else(str_starts(gene, "anti_"), "protein", "gene"))

.gene.o <- unique(.df$col) %>% sort %>% as.character

plt <- 
    .gg.plot(.df, aes(y = col, x = row, fill = exp(weight))) +
    facet_grid(tt ~ ., space="free", scales="free") +
    geom_tile(size = .1, colour = "gray20") +
    theme(axis.title = element_blank()) +
    scale_x_discrete(position = "top") +
    theme(axis.text.x = element_text(angle=90, vjust=0, hjust=0)) +
    theme(legend.position = "bottom") +
    scale_fill_distiller("normalized\nweights", palette = "YlGnBu", direction = 1)

print(plt)

.file <- fig.dir %&% "/Fig_mTconv_heatmap.pdf"
.gg.save(filename = .file, plot = plt, width=1.5, height=2.5)

PDF

.dt <- sum.dt[gene %in% .genes]

.df <- .dt %>%
    mutate(col = subject %&% "@" %&% celltype, row = gene) %>% 
    group_by(gene) %>% 
    mutate(weight = pmax(pmin(scale(pmax(log.mu, -4)), 1), -1)) %>% 
    ungroup %>% 
    na.omit %>% 
    col.order(.ro = .gene.o, ret.tab = TRUE) %>% 
    na.omit %>% 
    mutate(tt = if_else(str_starts(gene, "anti_"), "protein", "gene"))

plt <- 
    .gg.plot(.df, aes(x = col, y = row, fill = exp(weight))) +
    facet_grid(tt ~ celltype, space="free", scales="free") +
    geom_tile(size = .1, colour = "gray20") +
    theme(axis.title = element_blank()) +
    scale_x_discrete(position = "top") +
    theme(axis.text.x = element_text(angle=90, vjust=0, hjust=0)) +
    theme(legend.position = "bottom") +
    scale_fill_distiller("normalized\nweights", palette = "YlGnBu", direction = 1)

print(plt)

.file <- fig.dir %&% "/Fig_mTconv_heatmap_ind.pdf"
.gg.save(filename = .file, plot = plt, width=5, height=3)

PDF

Box plots for each gene

show.gene <- function(g, sum.dt) {

    .dt <- sum.dt[gene == g]

    .comp <- list(c("TH1", "TH1/17"),
                  c("TH1/17", "TH17"),
                  c("TH1", "TH17"),              
                  c("TH17", "TH2"),
                  c("TH1/17", "TH2"),
                  c("TH1", "TH2"))

    .gg.plot(.dt, aes(x = celltype, y = avg, group = celltype)) +
        theme(axis.text.x = element_text(angle=90, vjust=.5, hjust=1)) +
        theme(axis.title.x = element_blank()) +
        geom_boxplot(outlier.stroke=0, outlier.size=0, size=.5, width = .5) +
        geom_jitter(aes(fill = celltype), stroke=.2, size = 1, pch = 21, height = 0, width = .2) +
        scale_fill_brewer(palette = "Dark2", guide=FALSE) +
        ggpubr::stat_compare_means(comparisons = .comp, size=2, method = "wilcox.test") +
        ylab("average " %&% g %&% " expression\nwithin each individual")
}
.genes <- c("MGAT4A", "ITM2C", "FHIT", "CDCA7L", "CD40LG", "LYAR", "GZMK", "GBP4", "IFNGR2", "CTSH", "C1orf162", "ABCA1", "NR1D1", "GATA3", "CXCR3", "anti_CD183", "anti_CD196", "anti_CD194", "anti_CD185")

for(g in .genes) {
    plt <- show.gene(g, sum.dt)
    print(plt)
    .file <- fig.dir %&% "/Fig_mTconv_marker_" %&% g %&% ".pdf"
    .gg.save(filename = .file, plot = plt, width=1.3, height=2)
}

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF